# ==============================================================================
# Table 1: Comparison of MTurk sample to national sample
# ==============================================================================

# --- National benchmarks (ACS, Pew, ANES, FEC) ---

# ACS: Age, education, race/ethnicity, gender, income, household size
s0101_data <- read_csv(file = "data/raw_data/ACS/S0101/ACSST1Y2016.S0101-Data.csv", show_col_types = FALSE, skip = 1)
nat_age <- as.numeric(s0101_data$`Total!!Estimate!!SUMMARY INDICATORS!!Median age (years)`[1])

s1501_data <- read_csv(file = "data/raw_data/ACS/S1501/ACSST1Y2016.S1501-Data.csv", show_col_types = FALSE, skip = 1)
nat_college_plus <- as.numeric(s1501_data$`Percent!!Estimate!!Percent bachelor's degree or higher`[1]) / 100

dp05_data <- read_csv("data/raw_data/ACS/DP05/ACSDP1Y2016.DP05-Data.csv", show_col_types = FALSE, skip = 1)
nat_female <- as.numeric(dp05_data$`Percent!!SEX AND AGE!!Total population!!Female`[1]) / 100
nat_white <- as.numeric(dp05_data$`Percent!!RACE!!One race!!White`[1]) / 100
nat_black <- as.numeric(dp05_data$`Percent!!RACE!!One race!!Black or African American`[1]) / 100
nat_hispanic <- as.numeric(dp05_data$`Percent!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)`[1]) / 100

dp03_data <- read_csv("data/raw_data/ACS/DP03/ACSDP1Y2016.DP03-Data.csv", show_col_types = FALSE)
nat_income <- as.numeric(dp03_data$DP03_0062E[2])

dp02_data <- read_csv("data/raw_data/ACS/DP02/ACSDP1Y2016.DP02-Data.csv", show_col_types = FALSE)
nat_hhsize <- as.numeric(dp02_data$DP02_0015E[2])

# Pew: Party ID and ideology
pew_data <- read_sav("data/raw_data/PEW/Typology 17 public.sav")
nat_republican <- sum(pew_data$weight[pew_data$party == 1], na.rm = TRUE) / sum(pew_data$weight, na.rm = TRUE)
nat_democrat <- sum(pew_data$weight[pew_data$party == 2], na.rm = TRUE) / sum(pew_data$weight, na.rm = TRUE)
nat_independent <- sum(pew_data$weight[pew_data$party == 3], na.rm = TRUE) / sum(pew_data$weight, na.rm = TRUE)
nat_conservative <- sum(pew_data$weight[pew_data$ideo %in% c(1, 2)], na.rm = TRUE) / sum(pew_data$weight, na.rm = TRUE)
nat_moderate <- sum(pew_data$weight[pew_data$ideo == 3], na.rm = TRUE) / sum(pew_data$weight, na.rm = TRUE)
nat_liberal <- sum(pew_data$weight[pew_data$ideo %in% c(4, 5)], na.rm = TRUE) / sum(pew_data$weight, na.rm = TRUE)

# ANES: Turnout
anes_data <- read_dta("data/raw_data/ANES/anes_timeseries_2016.dta") 
anes_data$wgt <- anes_data$V160102
nat_turnout <- sum(anes_data$wgt[anes_data$V161031 == 1], na.rm = TRUE) /
  sum(anes_data$wgt[anes_data$V161031 %in% c(1, 2)], na.rm = TRUE)

# Federal Election Commission: Vote shares
nat_voted_clinton <- 65853514 / 136669276
nat_voted_trump   <- 62984828 / 136669276

nat_vals <- c(age = nat_age,
              college_plus = nat_college_plus,
              female = nat_female,
              white = nat_white,
              black = nat_black,
              hispanic = nat_hispanic,
              income = nat_income,
              hhsize = nat_hhsize,
              republican = nat_republican,
              democrat = nat_democrat,
              independent = nat_independent,
              conservative = nat_conservative,
              moderate = nat_moderate,
              liberal = nat_liberal,
              voted_2016 = nat_turnout,
              voted_clinton = nat_voted_clinton,
              voted_trump = nat_voted_trump)

med_age <- median(data$age, na.rm = TRUE)

covs <- c("female", "college_plus", "hispanic", "white", "black",
          "income", "hhsize", "republican", "democrat", "independent",
          "conservative", "moderate", "liberal", "voted_2016",
          "voted_clinton", "voted_trump")

mean_covs <- sapply(data[covs], mean, na.rm = TRUE)

mturk_vals <- c(age = med_age, mean_covs)

table_1 <- sprintf(fmt = "\\caption{Comparison of MTurk sample to national sample}
\\centering
\\begin{tabular}{lcc}
\\hline
 & MTurk Sample & National \\\\ \\hline
Age & %s & %s \\\\
Female & %s & %s \\\\
Bachelor’s degree or higher & %s & %s \\\\
Hispanic & %s & %s \\\\
White & %s & %s \\\\
Black & %s & %s \\\\
Income (9 levels) & %s & \\$%s \\\\
Household size & %s & %s \\\\
Republican & %s & %s \\\\
Democrat & %s & %s \\\\
Independent & %s & %s \\\\
Conservative & %s & %s \\\\
Moderate & %s & %s \\\\
Liberal & %s & %s \\\\
Voted (2016) & %s & %s \\\\
Voted for Clinton & %s & %s \\\\
Voted for Trump & %s & %s \\\\ \\hline
\\end{tabular}",
                   fmt(mturk_vals["age"], digits = 2), fmt(nat_vals["age"], digits = 2),
                   fmt(mturk_vals["female"], digits = 2), fmt(nat_vals["female"], digits = 2),
                   fmt(mturk_vals["college_plus"], digits = 2), fmt(nat_vals["college_plus"], digits = 2),
                   fmt(mturk_vals["hispanic"], digits = 2), fmt(nat_vals["hispanic"], digits = 2),
                   fmt(mturk_vals["white"], digits = 2), fmt(nat_vals["white"], digits = 2),
                   fmt(mturk_vals["black"], digits = 2), fmt(nat_vals["black"], digits = 2),
                   fmt(mturk_vals["income"], digits = 2), format_int(nat_vals["income"]),
                   fmt(mturk_vals["hhsize"], digits = 2), fmt(nat_vals["hhsize"], digits = 2),
                   fmt(mturk_vals["republican"], digits = 2), fmt(nat_vals["republican"], digits = 2),
                   fmt(mturk_vals["democrat"], digits = 2), fmt(nat_vals["democrat"], digits = 2),
                   fmt(mturk_vals["independent"], digits = 2), fmt(nat_vals["independent"], digits = 2),
                   fmt(mturk_vals["conservative"], digits = 2), fmt(nat_vals["conservative"], digits = 2),
                   fmt(mturk_vals["moderate"], digits = 2), fmt(nat_vals["moderate"], digits = 2),
                   fmt(mturk_vals["liberal"], digits = 2), fmt(nat_vals["liberal"], digits = 2),
                   fmt(mturk_vals["voted_2016"], digits = 2), fmt(nat_vals["voted_2016"], digits = 2),
                   fmt(mturk_vals["voted_clinton"], digits = 2), fmt(nat_vals["voted_clinton"], digits = 2),
                   fmt(mturk_vals["voted_trump"], digits = 2), fmt(nat_vals["voted_trump"], digits = 2))

writeLines(text = table_1, con = "code_and_output/tables/table_1.tex")

rm(list = setdiff(ls(), c("data", "fmt", "format_int", "n_sims")))
gc()
